Put the ipynb file and html file in the github branch you created in the last assignment and submit the link to the commit in brightspace
from plotly.offline import init_notebook_mode
import plotly.io as pio
import plotly.express as px
init_notebook_mode(connected=True)
pio.renderers.default = "plotly_mimetype+notebook"
#load data
df = px.data.gapminder()
df.head()
| country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num | |
|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | Asia | 1952 | 28.801 | 8425333 | 779.445314 | AFG | 4 |
| 1 | Afghanistan | Asia | 1957 | 30.332 | 9240934 | 820.853030 | AFG | 4 |
| 2 | Afghanistan | Asia | 1962 | 31.997 | 10267083 | 853.100710 | AFG | 4 |
| 3 | Afghanistan | Asia | 1967 | 34.020 | 11537966 | 836.197138 | AFG | 4 |
| 4 | Afghanistan | Asia | 1972 | 36.088 | 13079460 | 739.981106 | AFG | 4 |
Recreate the barplot below that shows the population of different continents for the year 2007.
Hints:
# YOUR CODE HERE
# Filter the DataFrame to select data for the year 2007
data_2007 = df.groupby('year').get_group(2007)
# Group the filtered data by continent and calculate the sum of numeric columns
data_grouped = data_2007.groupby('continent').sum().reset_index()
# Create a bar chart using Plotly Express
fig = px.bar(data_grouped, x='continent', y='pop', title='Total population per continent in 2007', color='continent')
# Customize the layout of the chart: hide the legend
fig.update_layout(showlegend=False)
# Display the resulting chart
fig.show()
C:\Users\gwabe\AppData\Local\Temp\ipykernel_32136\3459147802.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
# Filter the DataFrame to select data for the year 2007
data_2007 = df.groupby('year').get_group(2007)
# Group the filtered data by continent and calculate the sum of numeric columns
data_grouped = data_2007.groupby('continent').sum().reset_index()
# Create a bar chart using Plotly Express
fig = px.bar(data_grouped, x='continent', y='pop', title='Total population per continent in 2007', color='continent')
# Customize the layout of the chart: hide the legend
fig.update_layout(showlegend=False)
# Update the layout for the y-axis to order categories by total population in ascending order
fig.update_xaxes(categoryorder='total ascending')
# Display the resulting chart
fig.show()
C:\Users\gwabe\AppData\Local\Temp\ipykernel_32136\3132900638.py:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
Add text to each bar that represents the population
# YOUR CODE HERE
# Filter the DataFrame to select data for the year 2007
data_2007 = df[df['year'] == 2007]
# Group the filtered data by continent and calculate the sum of numeric columns
data_grouped = data_2007.groupby('continent').sum().reset_index()
# Create a bar chart using Plotly Express
fig = px.bar(data_grouped, x='continent', y='pop', title='Total population per continent in 2007', color='continent')
# Customize the layout of the chart: hide the legend
fig.update_layout(showlegend=False)
# Update the layout for the y-axis to order categories by total population in ascending order
fig.update_xaxes(categoryorder='total ascending')
# Customize the text labels on the bars: format with two decimal places and position them outside the bars
fig.update_traces(text=data_grouped['pop'].apply(lambda x: f'{x:.0f}'), textposition='outside')
# Display the resulting chart
fig.show()
C:\Users\gwabe\AppData\Local\Temp\ipykernel_32136\4144510497.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
Thus far we looked at data from one year (2007). Lets create an animation to see the population growth of the continents through the years
# YOUR CODE HERE
# Filter the DataFrame to select data for all years
data_all_years = df[df['year'] > 1950]
# Create a bar chart using Plotly Express with animation
fig = px.histogram(
data_all_years,
x='continent',
y='pop',
color='continent',
animation_frame='year', # Specify the column for the animation
title='Population Growth per Continent (1951-2007)',
range_y=[0, data_all_years['pop'].max()] # Set the y-axis range
)
# Customize the layout of the chart
fig.update_layout(
xaxis_title='Continent',
yaxis_title='Population',
showlegend=False
)
# Customize the text labels on the bars: format with two decimal places and position them inside the bars
fig.update_traces(
text=data_all_years['pop'].apply(lambda x: f'{x:.0f}'),
textposition='inside',
textfont=dict(color='white') # Make the text white for better visibility
)
# Display the resulting animated chart
fig.show()
Instead of the continents, lets look at individual countries. Create an animation that shows the population growth of the countries through the years
# YOUR CODE HERE
# Filter the DataFrame to select data for all years
data_all_years = df[df['year'] > 1950]
# Create a bar chart using Plotly Express with animation for individual countries
fig = px.histogram(
data_all_years,
x='country',
y='pop',
color='country',
animation_frame='year', # Specify the column for the animation
title='Population Growth per Country (1951-2007)',
range_y=[0, data_all_years['pop'].max()] # Set the y-axis range
)
# Customize the layout of the chart
fig.update_layout(
xaxis_title='Country',
yaxis_title='Population',
showlegend=False
)
# Customize the text labels on the bars: format with two decimal places and position them inside the bars
fig.update_traces(
text=data_all_years['pop'].apply(lambda x: f'{x:.0f}'),
textposition='inside',
textfont=dict(color='white') # Make the text white for better visibility
)
# Display the resulting animated chart
fig.show()
Clean up the country animation. Set the height size of the figure to 1000 to have a better view of the animation
# YOUR CODE HERE
# Filter the DataFrame to select data for all years
data_all_years = df[df['year'] > 1950]
# Create a bar chart using Plotly Express with animation for individual countries
fig = px.histogram(
data_all_years,
x='country',
y='pop',
color='country',
animation_frame='year', # Specify the column for the animation
title='Population Growth per Country (1951-2007)',
range_y=[0, data_all_years['pop'].max()] # Set the y-axis range
)
# Customize the layout of the chart
fig.update_layout(
xaxis_title='Country',
yaxis_title='Population',
showlegend=False,
height=1000 # Set the height of the figure to 1000 pixels
)
# Customize the text labels on the bars: format with two decimal places and position them inside the bars
fig.update_traces(
text=data_all_years['pop'].apply(lambda x: f'{x:.0f}'),
textposition='inside',
textfont=dict(color='white') # Make the text white for better visibility
)
# Display the resulting animated chart
fig.show()
# YOUR CODE HERE
# Filter the DataFrame to select data for all years
data_all_years = df[df['year'] > 1950]
# Group the data by country and find the sum of population for each country
country_populations = data_all_years.groupby('country')['pop'].sum().reset_index()
# Sort the countries by population in descending order and select the top 10
top_10_countries = country_populations.sort_values(by='pop', ascending=False).head(10)
# Create a bar chart using Plotly Express with animation for the top 10 countries
fig = px.histogram(
data_all_years[data_all_years['country'].isin(top_10_countries['country'])],
x='country',
y='pop',
color='country',
animation_frame='year',
title='Population Growth of Top 10 Countries (1951-2007)',
range_y=[0, top_10_countries['pop'].max()]
)
# Customize the layout of the chart
fig.update_layout(
xaxis_title='Country',
yaxis_title='Population',
showlegend=False,
)
# Customize the text labels on the bars: format with two decimal places and position them inside the bars
fig.update_traces(
text=data_all_years['pop'].apply(lambda x: f'{x:.0f}'),
textposition='inside',
textfont=dict(color='white')
)
# Display the resulting animated chart
fig.show()